
*****************************************************************************************

* This file imports and cleans the town demographic data from IPUMS NHGIS

*****************************************************************************************

/* steps in below file

1. import raw data
2. label variables
3. construct variables (e.g. percent white = white pop/total pop)

*/


** set directory

* import 
import delimited "nhgis_master.csv", varnames(1) case(preserve) clear 

drop GJOIN* STATE STATENH NHGISCODE
drop NAME*

* label data
qui{

foreach var of varlist * {
  label variable `var' "`=`var'[1]'"
  replace `var'="" if _n==1
  destring `var', replace
}

}

* rename variables
rename STATEFP statefips
rename PLACEA placefips
drop if statefips ==.
duplicates tag statefips placefips, gen(dup_flag)
drop if dup_flag == 1 & AV0AA1990 ==.
drop dup_flag

drop *125M*

rename *1970 *70
rename *1980 *80
rename *1990 *90
rename *2000 *00
rename *2010 *10

rename AV0AA125 AV0AA10_ACS

rename *125 *10

rename AV0AA* tot_pop*
rename AB2AA* median_hh_income*
rename BD5AA* per_cap*
rename CL6AA* people_in_poverty*

rename A57AA* urban_tot_pop*
rename A57AB* urban_area_pop*
rename A57AC* urban_cluster_pop*
rename A57AD* rural_pop*
rename B18AA* white_pop*
rename B18AB* black_pop*
rename B18AC* aian_pop*
rename B18AD* apio_pop*
drop B79AA*
drop AX6AA*
rename BS7AA* hh_less_10k*
rename BS7AB* hh_10k_15k*
rename BS7AC* hh_15k_25k*
rename BS7AD* hh_25k_plus*
rename A41AA* tot_units*
rename A43AA* occ_units*
rename A43AB* vac_units*
rename B37AA* owner_occ_units*
rename B37AB* rent_units*

* drop state-level observations so we don't double count
drop if statefips ==.
duplicates tag statefips placefips, gen(dup_flag)
drop if dup_flag == 1 & tot_pop00 ==.
drop dup_flag

* construct variables
gen pct_poverty70 = people_in_poverty70/tot_pop70
gen pct_poverty80 = people_in_poverty80/tot_pop80
gen pct_poverty90 = people_in_poverty90/tot_pop90
gen pct_poverty00 = people_in_poverty00/tot_pop00
gen pct_poverty10 = people_in_poverty10/tot_pop10

local year_list 70 80 90 00 10
foreach year in `year_list'{

	gen pct_white`year'=white_pop`year'/tot_pop`year'
	gen pct_black`year'=black_pop`year'/tot_pop`year'	
	gen vac_rate`year'=vac_units`year'/tot_units`year'
	gen pct_owned`year'=owner_occ_units`year'/occ_units`year'
	gen lpop`year'=log(tot_pop`year')

}

rename tot_pop* pop*

drop *_pop* hh_* people_in* *units*

* save final file
save "nhgis_data.dta", replace



